mapreduce 查找共同好友
A:B,C,D,F,E,O
B:A,C,E,K
C:F,A,D,I
D:A,E,F,L
E:B,C,D,M,L
F:A,B,C,D,E,O,M
G:A,C,D,E,F
H:A,C,D,E,O
I:A,O
J:B,O
K:A,C,D
L:D,E,F
M:E,F,G
O:A,H,I,J
求出哪些人两两之间有共同好友,及他俩的共同好友都是谁
例如A-B:C,E
A-E:B,C,D
一种错误的理解就是E在A的还有列表中,那么A也在E的列表,且A和E同时有的好友才是共同好友
,如果按照这个观点思考下去就简单了,但是这个是错的,因为A-E:B,C,D 这种不不满足
正确的理解是求人与人之间的共同好友,人与人之间是否是同一个好友,是否在彼此的好友列表无关。
如果这个程序不用mapreduce做那么应该是先把人全部切分出来,然后循环进行人与人的组合,组合之后将他们好友列表组合,将那些出现两次的还有找到,这些就是人与人之间的共同还有,也是人工去找共同好友的方法,
但是放在mapreuce。,,每次只能读取一行数据不能都到他行的,如果要读到其他行的就要找到一个key然后还要将其他行的数据类聚一起,这样才能读到其他行。
如果知道答案的话,这样想的话就可以避免混淆了
tom: apple,pear,banana,waterball
jerry:apple,pear
jack:banana,apple
哪些人两两之间有共同的水果,列举出两人所有的共同水果。这样大家都不会混淆了。但是工作中遇到的就是人和好友的问题,大胆的抽象成人和水果也是工作中要做的
下面链接是答案
package my.hadoop.hdfs.findFriend; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class FindCommonFriendOne { public static class FindFriendMapper extends Mapper<LongWritable, Text, Text, Text> { // 泛型,定义输入输出的类型 /** * 友 人 */ Text text = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 将mptask传给我们的文本内容转换成String String line = value.toString(); IntWritable ONE = new IntWritable(1); // 根据空格切分 String[] qqAndFriend = line.split(":");//分割出QQ号 String qq = qqAndFriend[0]; String otherFriend = ""; StringBuffer friendbuf = new StringBuffer(qqAndFriend[1]+","); String[] friends = qqAndFriend[1].split(","); for (String friend : friends) { //查找其他朋友 //otherFriend = friendbuf.delete(friendbuf.indexOf(friend),friendbuf.indexOf(friend)+1).toString(); context.write(new Text(friend), new Text(qq)); } } } public static class FindFriendReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text Keyin, Iterable<Text> values, Context context) throws IOException, InterruptedException { String qqs = ""; for (Text val : values) { qqs +=val.toString() + ","; } context.write(Keyin, new Text(qqs)); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(FindCommonFriendOne.class); job.setMapperClass(FindFriendMapper.class); job.setReducerClass(FindFriendReducer.class); //指定最终输出的数据kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean res = job.waitForCompletion(true); System.exit(res ? 0 :1); } }
package my.hadoop.hdfs.findFriend; import java.io.IOException; import java.util.StringTokenizer; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.Mapper; import org.apache.hadoop.mapreduce.Reducer; import org.apache.hadoop.mapreduce.lib.input.CombineTextInputFormat; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; import org.apache.hadoop.util.GenericOptionsParser; public class FindCommonFriendTwo { public static class FindFriendMapper extends Mapper<LongWritable, Text, Text, Text> { // 泛型,定义输入输出的类型 /** * 友 人 */ Text text = new Text(); @Override protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException { // 将mptask传给我们的文本内容转换成String String line = value.toString(); IntWritable ONE = new IntWritable(1); // 根据空格切分 String[] friendAndQQ = line.split("\t");//分割出QQ号 String friend = friendAndQQ[0]; String otherFriend = ""; StringBuffer friendbuf = new StringBuffer(friendAndQQ[1] ); String[] qqs = friendAndQQ[1].split(","); for (int i=0;i < qqs.length;i++) { //查找其他朋友 for(int j = i+1;j<qqs.length;j++) { //避免出现A-D 与D-A的情况 if(qqs[i].compareTo(qqs[j])>0) { context.write(new Text(qqs[i]+"-"+qqs[j]), new Text(friend)); } else{ context.write(new Text(qqs[j]+"-"+qqs[i]), new Text(friend)); } } } } } public static class FindFriendReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text Keyin, Iterable<Text> values, Context context) throws IOException, InterruptedException { StringBuffer friends = new StringBuffer(); for (Text val : values) { if(friends.indexOf(val.toString())<0) { friends.append(val).append(","); } } context.write(Keyin, new Text(friends.toString())); } } public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration configuration = new Configuration(); Job job = Job.getInstance(configuration); job.setJarByClass(FindCommonFriendTwo.class); job.setMapperClass(FindFriendMapper.class); job.setReducerClass(FindFriendReducer.class); //指定最终输出的数据kv类型 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); boolean res = job.waitForCompletion(true); System.exit(res ? 0 :1); } }
[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes0/* A I,K,C,B,G,F,H,O,D, B A,F,J,E, C A,E,B,H,F,G,K, D G,C,K,A,L,F,E,H, E G,M,L,H,A,F,B,D, F L,M,D,C,G,A, G M, H O, I O,C, J O, K B, L D,E, M E,F, O A,H,I,J,F,
[hadoop@mini2 study]$ hadoop fs -cat /findfriend/output/tes2/* B-A E,C, C-A F,D, C-B A, D-A E,F, D-B A,E, D-C F,A, E-A D,C,B, E-B C, E-C D, E-D L, F-A C,O,D,E,B, F-B C,A,E, F-C A,D, F-D E,A, F-E C,B,M,D, G-A E,D,C,F, G-B E,A,C, G-C D,F,A, G-D A,E,F, G-E D,C, G-F C,A,E,D, H-A O,E,C,D, H-B E,C,A, H-C D,A, H-D E,A, H-E C,D, H-F C,D,A,E,O, H-G C,A,E,D, I-A O, I-B A, I-C A, I-D A, I-F A,O, I-G A, I-H A,O, J-A B,O, J-E B, J-F O,B, J-H O, J-I O, K-A D,C, K-B A,C, K-C D,A, K-D A, K-E C,D, K-F D,C,A, K-G D,C,A, K-H C,D,A, K-I A, L-A E,D,F, L-B E, L-C D,F, L-D F,E, L-E D, L-F D,E, L-G E,F,D, L-H E,D, L-K D, M-A F,E, M-B E, M-C F, M-D F,E, M-F E, M-G E,F, M-H E, M-L E,F, O-B A, O-C I,A, O-D A, O-F A, O-G A, O-H A, O-I A, O-K A,
mapreduce系列(7)--查找共同好友
学好计算机,走遍天下都不怕